import numpy
import math
import scipy
import scipy.io
import scipy.misc
import scipy.cluster
import scipy.cluster.vq
import matplotlib
import matplotlib.pyplot
import os
import IPython
import IPython.parallel
import itertools
import random
import sklearn
import sklearn.decomposition
import sklearn.manifold
import sklearn.cluster
import sklearn.feature_extraction
import sklearn.feature_extraction.text
base_path = "/u/mlrobert/code/local/2013_drawing_assistant/data/"
local_feature_cluster_centroids_path = \
base_path+"local_feature_cluster_centroids/bunny2/"+ \
"apparent_ridges.num_latitude_lines=20.num_longitude_lines=20.gabor.num_thetas=08.galif.patch_width=15.num_samples=32.num_tiles=04.num_samples=1000000.k=5000/"
local_features_base_path = \
base_path+"local_features/bunny2/"+ \
"apparent_ridges.num_latitude_lines=20.num_longitude_lines=20.gabor.num_thetas=08.galif.patch_width=15.num_samples=32.num_tiles=04/"
global_features_path = \
base_path+"global_features/bunny2/"+ \
"apparent_ridges.num_latitude_lines=20.num_longitude_lines=20.gabor.num_thetas=08.galif.patch_width=15.num_samples=32.num_tiles=04.num_samples=1000000.k=5000/"
if not os.path.exists(global_features_path):
os.makedirs(global_features_path)
local_feature_cluster_centroids_path_name_ext = local_feature_cluster_centroids_path+"local_feature_cluster_centroids.mat"
img_names = sorted(os.listdir(local_features_base_path))
num_images = len(img_names)
assert(num_images > 0)
theta_names = sorted(os.listdir(local_features_base_path+img_names[0]))
num_thetas = len(theta_names)
assert(num_thetas > 0)
local_feature_names = sorted(os.listdir(local_features_base_path+img_names[0]+"/"+theta_names[0]))
num_local_features = len(local_feature_names)
assert(num_local_features > 0)
num_samples = 1000000
local_feature_cluster_centroids_mat = scipy.io.loadmat(local_feature_cluster_centroids_path_name_ext)
local_feature_cluster_centroids = local_feature_cluster_centroids_mat["local_feature_cluster_centroids"]
print local_feature_cluster_centroids.shape
(5000, 128)
def compute_global_features(img_name):
import numpy
import scipy
import scipy.io
import scipy.cluster
import scipy.cluster.vq
local_feature_norm_accept_threshold = 0.01
codebook_size = 5000
all_local_features = []
for local_feature_name in local_feature_names:
local_features = []
for theta in range(num_thetas):
local_feature_path = \
local_features_base_path+ \
img_name+"/"+theta_names[theta]+"/"+local_feature_name
local_feature_mat = scipy.io.loadmat(local_feature_path)
local_feature = local_feature_mat["tile"]
local_features.append(local_feature)
local_features_1d = numpy.array(local_features).ravel()
if numpy.linalg.norm(local_features_1d) > local_feature_norm_accept_threshold:
all_local_features.append(local_features_1d)
all_local_features_2d = numpy.array(all_local_features)
all_local_features_2d_codebook_indices, distances = scipy.cluster.vq.vq(all_local_features_2d, local_feature_cluster_centroids)
hist, bin_edges = numpy.histogram(all_local_features_2d_codebook_indices, bins=codebook_size)
global_features_path_name_ext = global_features_path+img_name+".mat"
scipy.io.savemat(global_features_path_name_ext, {"global_features":hist}, oned_as="column")
compute_global_features(img_names[0])
client = IPython.parallel.Client()
directView = client[:]
directView["local_features_base_path"] = local_features_base_path
directView["global_features_path"] = global_features_path
directView["theta_names"] = theta_names
directView["num_thetas"] = num_thetas
directView["local_feature_names"] = local_feature_names
directView["local_feature_cluster_centroids"] = local_feature_cluster_centroids
asyncResult = directView.map_async(compute_global_features, img_names)
print asyncResult.progress
print asyncResult.ready()
22 True
results = asyncResult.get()
print results
[None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]